---
title: "List experiments"
author: "Dai Yamao"
date: "`r format(Sys.time(), '%Y-%m-%d')`"
output: html_document
---

```{r setup, include=FALSE}
rm(list=ls())
knitr::opts_chunk$set(echo = TRUE, fig.width = 10, fig.height = 5)

library(stargazer)
library(tidyverse)
library(stringi)
library(memisc)
library(interplot)
library(ggplot2)
library(dplyr)
library(ggpubr)
library(knitr)
library(kableExtra)
library(broom)
library(purrr)
library(flextable)
library(officer)
library(emmeans)
library(BalanceR)
library(ggeffects)
library(margins)

dat <- read_csv("data/Iraq_election2025.csv")

dat <- dat |>
    mutate(
      treatment = as.numeric(`Q12t_1`),
      control   = as.numeric(`Q12c_1`),
      Treatment = case_when(
        !is.na(treatment) ~ 1,
        !is.na(control)   ~ 0),
      Experiment_score = coalesce(treatment, control),
      Gender = D1,
      Age = D2 + 17,
      Education = D3,
      Income = D5,
      Sect = case_when(D6 == 1 ~ "Sunni",
                       D6 == 2 ~ "Shia",
                       D6 == 3 ~ "Christian",
                       D6 == 4 ~ "Kurd",
                       D6 == 5 ~ "Other"),
      Support_same_ethno_sectarian_origin = Q14_3,
      Voting_pre  = as.numeric(Q8),
      Voting_post = as.numeric(Q11),
      stimulus = as.numeric(Stimulus),
      Stimulus = factor(stimulus, level = c(1, 2),
                        labels = c("Control", "Treatment"))) |> 
      filter(!is.na(Voting_pre), !is.na(Voting_post), !is.na(Stimulus)) 

# ruling parties support means
ruling_parties <- c("Q13_1", "Q13_2", "Q13_4", "Q13_5", "Q13_6")

dat <- dat |> 
  mutate(ruling_support = rowMeans(across(all_of(ruling_parties)), na.rm = TRUE))

# support for each sectarian parties means
shia_parties <- c("Q13_1", "Q13_2", "Q13_3", "Q13_4", "Q13_5", "Q13_6")
dat <- dat |> 
  mutate(support_shia = rowMeans(across(all_of(shia_parties)), na.rm = TRUE))

sunni_parties <- c("Q13_7", "Q13_8", "Q13_9")
dat <- dat |> 
  mutate(support_sunni = rowMeans(across(all_of(sunni_parties)), na.rm = TRUE))

kurd_parties <- c("Q13_11", "Q13_12")
dat <- dat |> 
  mutate(support_kurd = rowMeans(across(all_of(kurd_parties)), na.rm = TRUE))

```

# balance check 
```{r}
dat <- dat |> 
  mutate(sect = D5)
balance <- BalanceR(data = dat, 
                    group = Treatment,
                    cov = c(Gender, Age, Education, Income, sect))
print(balance, digits = 3)
summary(balance, digits = 5)
plot(balance, point.size = 5, text.size = 18)

```

# result
```{r}
result <- dat |> 
  summarise(control_mean = mean(control, na.rm = TRUE), control_SE = sd(dat$control, na.rm = TRUE) / sqrt(length(dat$control)),
            treatment_mean = mean(treatment, na.rm = TRUE), treatment_SE = sd(dat$treatment, na.rm = TRUE) / sqrt(length(dat$treatment)))
result$treatment_mean - result$control_mean

df <- data.frame(
  Experiment = c("Control", "Treatment"),
  Mean = c(result$control_mean, result$treatment_mean),
  SE = c(result$control_SE, result$treatment_SE)
)

ggplot(df, aes(x = Experiment, y = Mean)) +
  geom_point(size = 3, color = "black") + 
  geom_errorbar(aes(ymin = Mean - SE, ymax = Mean + SE), 
                width = 0.2, color = "black") + 
  labs(title = "Result of list experiment", 
       x = "", y = "Mean") +
  theme_bw(base_size = 14)      
```

# DQ gap
```{r}
table(dat$Q15)
dat <- dat |>
  mutate(
    Q15_binary = ifelse(Q15 >= 4, 1, 0)
  )
table(dat$Q15_binary)

# means
means <- dat |> 
  group_by(Treatment) |> 
  summarise(mean_score = mean(Experiment_score, na.rm = TRUE))

control_mean <- means$mean_score[means$Treatment == 0]

dat <- dat |>
  mutate(latent_support = ifelse(Treatment == 1, Experiment_score - control_mean, NA))

# gap and self-censorship score
dat <- dat |>
  mutate(censorship_gap = latent_support - Q15_binary)
summary(dat$censorship_gap)



# plot: result of list and DQ
direct_support <- mean(dat$Q15_binary, na.rm = TRUE)

list_support <- mean(dat$Experiment_score[dat$Treatment == 1], na.rm = TRUE) -
  mean(dat$Experiment_score[dat$Treatment == 0], na.rm = TRUE)

support_df <- tibble(
  Type = c("Direct question", "List experiment (estimated)"),
  SupportRate = c(direct_support, list_support)
)

ggplot(support_df, aes(x = Type, y = SupportRate, fill = Type)) +
  geom_bar(stat = "identity", width = 0.6) +
  geom_text(aes(label = scales::percent(SupportRate, accuracy = 1)),
            vjust = -0.5, size = 5) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1), limits = c(0,1)) +
  scale_fill_brewer(palette = "Set2") +
  labs(
    title = "Percentage of people against the election commission revoking candidate eligibility",
    subtitle = "Comparison between list experiment and direct question",
    x = "",
    y = "Support rate"
  ) +
  theme_minimal(base_size = 14) +
  theme(legend.position = "none")

# plot: self censorship: + means agree latent, but do not show in DQ openly
ggplot(dat |> filter(!is.na(censorship_gap)), aes(x = censorship_gap)) +
  geom_histogram(binwidth = 0.5, fill = "grey45", color = "grey20", alpha = 0.3) +
  geom_vline(xintercept = 0, color = "red", linetype = "dashed", linewidth = 0.6) +
  labs(
    title = "Distribution of self-censorship gaps",
    x = "Self-censorship gap (latent support minus stated support)",
    y = "Number"
  ) +
  theme_minimal(base_size = 14)
```



# regression
```{r}
# model 5
reg00 <- lm(formula = Experiment_score ~ Treatment,
            data = dat)
summary(reg00)

# Model 6
reg01 <- lm(formula = Experiment_score ~ Treatment + 
              Gender + Age + Education + Income, data = dat)
summary(reg01)

# Model8
reg07 <- lm(formula = Experiment_score ~ Treatment * support_sunni + 
              Gender + Age + Education + Income, data = dat)
summary(reg07)

# Model 7
reg007 <- lm(formula = Experiment_score ~ Treatment * support_sunni,
             data = dat)
summary(reg007)


# marginal effect of reg07
p <- ggpredict(reg07, terms = c("support_sunni [all]", "Treatment"))

ggplot(p, aes(x = x, y = predicted, color = group)) +
  geom_line(size = 1.2) +
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high, fill = group),
              alpha = 0.15, color = NA) +
  labs(
    x = "Support for Sunni parties",
    y = "Predicted Experiment Score",
    color = "Treatment",
    fill  = "Treatment",
    title = "Marginal Effects of Support for Sunni Parties (Interaction Plot)"
  ) +
  theme_minimal(base_size = 14)

```

# regression table
```{r}
table2 <- mtable("Model 5" = reg00, "Model 6" = reg01,
                "Model 7" = reg007, "Model 8" = reg07,
               summary.stats=c("adj. R-squared","F","p", "N"))
print(table2)
write_html(table2, file = "result/Table2.html")
```

# statistics table
```{r}
dat_desc <- dat |>
  dplyr::select(
    Experiment_score,
    Treatment,
    support_sunni
  )

psych::describe(dat_desc)[, c("n", "mean", "sd", "min", "max")]
```
